package cn.dmp.tags

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.Row

/**
  * 关键字标签
  * Created by Administrator on 2018/4/29.
  */
object Tags4KeyW extends Tags{
  override def makeTags(args: Any*): Map[String, Int] = {

    //将参数强转成Row类型和Broadcast类型的
    val row = args(0).asInstanceOf[Row]
    val stopBD = args(1).asInstanceOf[Broadcast[Map[String,Int]]]
    //定义一个Map集合装数据
    var tags = Map[String,Int]()

    val keyWords = row.getAs[String]("keywords")

    val filter = keyWords.split("\\|")
      .filter(t => t.length >= 3 && t.length < 8 && !stopBD.value.contains(t))
      .foreach(t => tags += "K" + t -> 1)

    tags
  }
}
